package cn.com.shuyangyang.util;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;

import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

/**
 * 文件操作工具类
 * 
 * @author ShuYangYang E-Mail:shuyangyang@aliyun.com
 *         http://www.shuyangyang.com.cn Date:2015年8月29日上午10:08:50
 * 
 */
public class FileUtil {

	private static ZipFile			zf;
	private static ZipInputStream	zin;

	public static void main(String[] args) {
		try {
			readZipFile("E:\\招财宝日结文件\\5202\\2015070700\\bill\\bill_1.zip");//zip文件读取
			System.out.println(getTextFromPDF("C:\\Users\\Administrator\\Desktop\\Shiro教程.pdf"));//PDF文件读取
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	/**
	 * 读取zip文件中的内容
	 * 
	 * @param file
	 * @throws Exception
	 */
	public static void readZipFile(String file) throws Exception {
		zf = new ZipFile(file);
		InputStream in = new BufferedInputStream(new FileInputStream(file));
		zin = new ZipInputStream(in);
		ZipEntry ze;
		while ((ze = zin.getNextEntry()) != null) {
			if (ze.isDirectory()) {

			} else {
				System.err.println("file - " + ze.getName() + " : "
						+ ze.getSize() + " bytes");
				long size = ze.getSize();
				if (size > 0) {
					BufferedReader br = new BufferedReader(
							new InputStreamReader(zf.getInputStream(ze),"GBK"));
					String line;
					while ((line = br.readLine()) != null) {
						System.out.println(line);
					}
					br.close();
				}
				System.out.println();
			}
		}
		zin.closeEntry();
	}
	
	/**
	 * 解析PDF文件
	 * @param pdfFilePath
	 * @return
	 */
	public static String getTextFromPDF(String pdfFilePath) 
	{
		String result = null;
		FileInputStream is = null;
		PDDocument document = null;
		try {
			is = new FileInputStream(pdfFilePath);
			PDFParser parser = new PDFParser(is);
			parser.parse();
			document = parser.getPDDocument();
			PDFTextStripper stripper = new PDFTextStripper();
			result = stripper.getText(document);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (is != null) {
				try {
					is.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
			if (document != null) {
				try {
					document.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return result;
	}

	/**
	 * 字符串编码转换的实现方法
	 * 
	 * @param str
	 *            待转换编码的字符串
	 * @param newCharset
	 *            目标编码
	 * @return
	 * @throws UnsupportedEncodingException
	 */
	public static String changeCharset(String str, String newCharset)
			throws UnsupportedEncodingException {
		if (str != null) {
			// 用默认字符编码解码字符串。
			byte[] bs = str.getBytes();
			// 用新的字符编码生成字符串
			return new String(bs, newCharset);
		}
		return null;
	}
}
